knitr::opts_chunk$set(echo = TRUE)
# devtools::install_github("thebioengineer/tidytuesdayR")
# devtools::install_github('thomasp85/gganimate')
options(scipen = 9999.9)
if (!grepl('2020_26', getwd()))
setwd('./2020_26_caribou')
library(tidytuesdayR)
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sp)
library(gganimate)
raw <- tuesdata <- tidytuesdayR::tt_load('2020-06-23')
## --- Compiling #TidyTuesday Information for 2020-06-23 ----
## --- There are 2 files available ---
## --- Starting Download ---
##
## Downloading file 1 of 2: `locations.csv`
## Downloading file 2 of 2: `individuals.csv`
## --- Download complete ---
ind <- raw$individuals
#filter the location data to a representative 3 years
#add year and month
loc <- raw$locations %>%
mutate(datestamp = as.Date(timestamp),
year = lubridate::year(datestamp)) %>%
filter(year %in% c(2011, 2012, 2013)) %>%
left_join(ind %>% select(animal_id, sex))
## Joining, by = "animal_id"
#find the top animals by number of observations
top_bou <- loc %>% group_by(animal_id) %>%
count() %>%
arrange(desc(n)) %>%
ungroup() %>%
top_n(5, n)
#filter the loc data for the top animals
top_bou_pts <- loc %>% filter(animal_id %in% top_bou$animal_id) %>%
left_join(ind %>% select(animal_id, sex)) %>%
arrange(datestamp)
## Joining, by = c("animal_id", "sex")
#get a bounding box based on the lat and lon ranges in the data
bc_box <-
ggmap::make_bbox(data = loc, lon = longitude, lat = latitude)
#download map tiles - stamen watercolor is pretty
bc_stamen <- ggmap::get_stamenmap(
maptype = 'watercolor',
bbox = bc_box,
# bbox = c(left = -124.1616,bottom = 54, right = -119.75, top = 56),
crop = TRUE,
zoom = 7
)
## Source : http://tile.stamen.com/watercolor/7/19/39.jpg
## Source : http://tile.stamen.com/watercolor/7/20/39.jpg
## Source : http://tile.stamen.com/watercolor/7/21/39.jpg
## Source : http://tile.stamen.com/watercolor/7/19/40.jpg
## Source : http://tile.stamen.com/watercolor/7/20/40.jpg
## Source : http://tile.stamen.com/watercolor/7/21/40.jpg
#plot it. stat_density2d creates the heatmap
bou_map <- ggmap::ggmap(bc_stamen) +
stat_density2d(
aes(
x = longitude,
y = latitude,
# color=study_site,
alpha = stat(level)
),
fill = "darkblue",
geom = "polygon",
bins = 20,
data = loc,
show.legend = FALSE
) +
#Add in the line data for the top five beasts
geom_line(
data = subset(loc, animal_id %in% top_bou$animal_id),
aes(x = longitude, y = latitude, color = animal_id)
) +
facet_grid(
rows = vars(year),
cols = vars(season),
shrink = TRUE,
switch = "y"
) +
theme_void() +
theme(legend.position = "bottom") +
labs(
colour = "Animal ID",
title = "Range of Caribou in British Columbia",
subtitle = "including path data for top 5 'bou",
caption = "Seip DR, Price E (2019) Data from: Science update for the South Peace Northern Caribou\n(Rangifer tarandus caribou pop. 15) in British Columbia.\nMovebank Data Repository. https://doi.org/10.5441/001/1.p5bn656k\n
viz: @WireMonkey Alyssa Goldberg\n
TidyTuesday2020 week 26"
) +
# scale_color_viridis_d()+
NULL
ggsave("caribou_range.png",width = 11, height = 11)
bou_map

library(geosphere)
#calculate distances
#https://stackoverflow.com/questions/49532911/calculate-distance-longitude-latitude-of-multiple-in-dataframe-r
#group and arrange the data by animal and timestamp. using lead (could also use lag) find the distance between the points with geosphere::distHaversine(), remove any null distances (indicates the start of a new animal id)
bou_dist <- loc %>%
distinct() %>%
group_by(animal_id) %>%
arrange(animal_id, desc(timestamp)) %>%
mutate(mnth = lubridate::month(datestamp)) %>%
mutate(distance = distHaversine(cbind(longitude, latitude),
cbind(lead(longitude), lead(latitude)))) %>%
filter(!is.na(distance))
#Race 'em! For efficiency take a subset
bou_race <- bou_dist %>%
filter(season == "Summer",
year == 2011,
mnth == 7) %>%
ungroup() %>%
group_by(animal_id) %>%
arrange(datestamp) %>%
mutate(tot_dist = cumsum(distance))
#find the top animals by distance covered
top_racers <- bou_race %>%
group_by(animal_id) %>%
filter(tot_dist == max(tot_dist)) %>%
ungroup() %>%
top_n(10, tot_dist)
#filter the data to include only the top racers
bou_race_df <-
bou_race %>%
filter(animal_id %in% top_racers$animal_id) %>%
select(animal_id, datestamp, tot_dist) %>%
ungroup() %>%
mutate(animal_id = fct_reorder(animal_id, .fun = sum, tot_dist))
p <- bou_race_df %>%
ggplot(., aes(x = animal_id, y = tot_dist/1000, color = animal_id)) +
geom_line(show.legend = FALSE) +
geom_point(size = 3, show.legend = FALSE) +
labs(x="", y="distance km")+
coord_flip() +
transition_reveal(datestamp) +
shadow_mark(alpha = FALSE, size = 1)
animate(p, fps = 25, duration = 20, width = 800, height = 600, end_pause = 100)
